Source code for orca

# coding: utf-8

import pathlib
import re

import numpy as np
import pandas as pd
from pymatgen import Element, Molecule, Spin

"""
This module implements input and output processing from Orca.
"""

__author__ = "Hugo Santos-Silva, Germain Vallverdu"
__copyright__ = "Copyright 2016, UPPA-CNRS"
__version__ = "0.1"
__date__ = "Dec 11, 2016"

# convertion Bohr --> Angstrom
BOHR_TO_ANGS = 0.529177249
UA_TO_KCAL = 627.503


[docs]class OrcaInput:
    """
    An object representing an Orca input file.

    Args:
        mol: Input molecule. If molecule is a single string, it is used as a
            direct input to the geometry section of the input file.
        charge (float): Charge of the molecule. If None, charge on molecule is used.
            Defaults to None. This allows the input file to be set a
            charge independently from the molecule itself.
        spin_multiplicity (float): Spin multiplicity of molecule. Defaults to None,
            which means that the spin multiplicity is set to 1 if the
            molecule has no unpaired electrons and to 2 if there are
            unpaired electrons.
        input_parameters (list): Input parameters for run as a list
        blocks (list): Blocks input for advanced settings as a list of string. The
            block has to be already well formatted.
    """

    default_params = ["B3LYP Def2-SVP", "Opt"]

    def __init__(self, mol, charge=None, spin_multiplicity=None,
                 input_parameters=None, blocks=None):

        self._mol = mol
        self.input_parameters = input_parameters if input_parameters else self.default_params
        self.blocks = blocks if blocks else []

        if isinstance(mol, Molecule):
            self.charge = charge if charge else mol.charge
            self.spin_multiplicity = spin_multiplicity if spin_multiplicity else mol.spin_multiplicity
            self.molecule.set_charge_and_spin(self.charge, self.spin_multiplicity)
        else:
            self.charge = charge if charge else 0
            self.spin_multiplicity = spin_multiplicity if spin_multiplicity else 1
            print("WARNING: consistency between chage and spin multiplicity not verified.")

    @property
    def molecule(self):
        """
        Returns molecule associated with this OrcaInput.
        """
        return self._mol

[docs]    def get_string(self):
        """ Return a string representation of the input file """
        lines = ""
        # input parameters
        for param in self.input_parameters:
            lines += "! %s\n" % param
        # blocks
        for block in self.blocks:
            lines += block
        # geometry block
        lines += "* xyz %d %d\n" % (self.charge, self.spin_multiplicity)
        if isinstance(self.molecule, str):
            lines += self.molecule
        elif isinstance(self.molecule, Molecule):
            for site in self.molecule:
                lines += "%2s" % site.specie
                lines += "  %12.6f %12.6f %12.6f\n" % tuple(site.coords)
        else:
            raise TypeError("Bad Molecule object. I cannot export xyz coordinates."
                            "\nPlease, provides a string represenation or a pymatgen"
                            "Molecule object.")
        if lines[-1] != "\n":
            lines += "\n"
        lines += "*\n"
        return lines

    def __str__(self):
        return self.get_string()


[docs]class OrcaHessian:
    """
    Parser for ORCA Hessian file. All data are in atomic units.

    Args:
        filename: Filename of ORCA hessian file.

    .. attribute:: filename

        Path to the ORCA Hessian file

    .. attribute:: energy

        Energy in Hartree.

    .. attribute:: molecule

        The molecule geometry read in the hessian file. Coordinates are read
        in atomic unit and store in the molecule object in atomic units.

    .. attribute:: frequencies

        A list of dict for each frequencies with ::

            {
                "frequency": freq in cm-1,
                "symmetry": symmetry tag
                "r_mass": Reduce mass,
                "f_constant": force constant,
                "IR_intensity": IR Intensity,
                "mode": normal mode
             }

        The normal mode is a 1D vector of dx, dy dz of each atom.

    .. attribute:: hessian

        Matrix of second derivatives of the energy with respect to cartesian
        coordinates in the **input orientation** frame. Need #P in the
        route section in order to be in the output.

    """

    def __init__(self, filename):
        self.filename = filename

        # set all attributes to None, thus it will always exist
        self.molecule = None
        self.frequencies = None
        self.hessian = None
        self.energy = None

        # parse file
        self._parse()

    def _parse(self):
        """ Parse the file and fill in object and attributes """

        with open(self.filename, 'r') as f:
            for line in f:
                #
                # Read energy
                #
                if '$act_energy' in line:
                    self.energy = float(f.readline())

                #
                # Read the hessian matrix
                #
                elif '$hessian' in line:
                    dimension = int(f.readline())
                    matrix = np.zeros((dimension, dimension))
                    while line != '\n':
                        line = f.readline()
                        if '.' not in line:
                            jindex = [int(jj) for jj in line.split()]
                        else:
                            iindex = int(line.split()[0])
                            datas = [float(val) for val in line.split()[1:]]
                            for j, data in enumerate(datas):
                                # print (j, jindex[j], iindex)
                                matrix[iindex, jindex[j]] = data

                    self.hessian = matrix

                #
                # parse normal mode
                #
                elif '$normal_modes' in line:
                    dimension = int(f.readline().split()[0])
                    normal_modes = np.zeros((dimension, dimension))
                    end = False
                    while not end:
                        jindex = [int(j) for j in f.readline().split()]
                        for i in range(dimension):
                            val = [float(v) for v in f.readline().split()[1:]]
                            for jval, j in enumerate(jindex):
                                normal_modes[i, j] = val[jval]
                        if jindex[-1] == dimension - 1:
                            end = True

                #
                # Parse geometry
                #
                elif '$atoms' in line:
                    atoms = []
                    coords = []

                    natoms = int(next(f))
                    for i in range(natoms):
                        line = next(f)
                        data = line.split()
                        atoms.append(Element(data[0]))
                        coords.append([float(x) for x in data[2:5]])
                    self.molecule = Molecule(atoms, coords)

                #
                # Parse IR data
                #
                elif '$ir_spectrum' in line:
                    frequencies = []
                    ntfreqs = int(f.readline())  # aussi int(next(f))
                    ifreq = 0
                    for i in range(ntfreqs):
                        data = f.readline().split()
                        frequencies.append({"frequency": float(data[0]),
                                            "r_mass": None,
                                            "f_constant": None,
                                            "IR_intensity": float(data[1]),
                                            "symmetry": None,
                                            "mode": []})

        # add normal modes to frequencies dict
        for ifreq, freq in enumerate(frequencies):
            freq["mode"] = normal_modes[:, ifreq]
        self.frequencies = frequencies

[docs]    def symmetrize_hessian(self, inplace=False):
        """
        Return a symmetrized hessian matrix.

        Args:
            inplace (bool): if True, do operation inplace and return None.
        """
        matrix = (self.hessian + self.hessian.T) / 2
        if inplace:
            self.hessian = matrix
            return None
        else:
            return matrix

[docs]    def get_mol_ang(self, inplace=False):
        """
        Return a new Molecule object with coordinate in angstrom

        Args:
            inplace (bool): if True, do operation inplace and return None.
        """
        newmol = Molecule(self.molecule.species,
                          self.molecule.cart_coords * BOHR_TO_ANGS)
        if inplace:
            self.molecule = newmol
            return None
        else:
            return newmol


[docs]class OrcaVPT2:
    """
    Parser for ORCA .vpt2 file.

    Args:
        filename: Filename of ORCA .vpt2 file.

    .. attribute:: filename

        Path to the ORCA .vpt2 file

    .. attribute:: settings

        VPT2 calculation settings.

    .. attribute:: molecule

        The molecule geometry read in the hessian file. Coordinates are read
        in atomic unit and store in the molecule object in atomic units.

    .. attribute:: hessian

        Hessian matrix in Eh/(bohr**2). Array shape (3N, 3N)

    .. attribute:: dipole_derivative

        Dipole derivatives in (Eh * bohr)^(1/2). Array shape (3N, 3)

    .. attribute:: cubic_terms

        Cubic force field in cm-1. Array shape (3N-6, 3N-6, 3N-6)

    .. attribute:: semi_quartic_terms

        Semi-quartic force field in cm-1. Only [i][j][k][k] terms are provided.
        The last index is not repeated. Thus the shape of the array is
        (3N-6, 3N-6, 3N-6)

    """

    def __init__(self, filename):
        self.filename = filename

        # set all attributes to None, thus it will always exist
        self.settings = None
        self.molecule = None
        self.hessian = None
        self.dipole_derivative = None
        self.cubic_terms = None
        self.semi_quartic_terms = None

        # parse file
        self._parse()

    def _parse(self):
        """ Parse the file and fill in object and attributes """

        num_patt = re.compile(r"\d\.\d+[deDE]?[+-]?\d+")
        hess_patt = re.compile(r"^\s+(\d+)\s+(\d+)\s+([+-]?\d+\.\d+)")
        cubic_patt = re.compile(r"^\s+(\d+)\s+(\d+)\s+(\d+)\s+([+-]?\d+\.\d+)")

        with open(self.filename, 'r') as f:
            for line in f:
                #
                # Read energy
                #
                if '# VPT2 settings' in line:
                    self.settings = dict()
                    line = f.readline()
                    while line.strip() != "":
                        key, val = line.split("=")
                        if num_patt.match(val):
                            val = float(val)
                        elif re.match(r"\d+", val):
                            val = int(val)
                        self.settings[key] = val
                        line = f.readline()

                if "# Atomic coordinates in Angstroem" in line:
                    line = f.readline()
                    species = list()
                    coords = list()
                    line = f.readline()
                    while re.match(r"^\s+\w{1,2}\s+\d+\s+\d+\.\d+\s+[+-]?\d+\.\d+", line):
                        data = line.split()
                        species.append(data[0])
                        coords.append([float(val) for val in data[3:6]])
                        line = f.readline()
                    self.molecule = Molecule(species, coords)

                if "# Hessian[i][j] in Eh/(bohr**2)" in line:
                    ni, nj = [int(val) for val in f.readline().split()]
                    self.hessian = np.zeros((ni, nj))
                    line = f.readline()
                    while hess_patt.match(line):
                        i, j, val = hess_patt.match(line).groups()
                        self.hessian[int(i), int(j)] = float(val)
                        line = f.readline()

                if "# Dipole Derivatives[i][j] in (Eh*bohr)^1/2" in line:
                    ni, nj = [int(val) for val in f.readline().split()]
                    self.dipole_derivative = np.zeros((ni, nj))
                    line = f.readline()
                    while hess_patt.match(line):
                        i, j, val = hess_patt.match(line).groups()
                        self.dipole_derivative[int(i), int(j)] = float(val)
                        line = f.readline()

                if "# Cubic[i][j][k] force field in 1/cm" in line:
                    ni, nj, nk = [int(val) for val in f.readline().split()]
                    self.cubic_terms = np.zeros((ni, nj, nk))
                    line = f.readline()
                    while cubic_patt.match(line):
                        i, j, k, val = cubic_patt.match(line).groups()
                        self.cubic_terms[int(i), int(j), int(k)] = float(val)
                        line = f.readline()

                if "# Semi-quartic[i][j][k][k] force field in 1/cm" in line:
                    ni, nj, nk = [int(val) for val in f.readline().split()]
                    self.semi_quartic_terms = np.zeros((ni, nj, nk))
                    line = f.readline()
                    while cubic_patt.match(line):
                        i, j, k, val = cubic_patt.match(line).groups()
                        self.semi_quartic_terms[int(i), int(j), int(k)] = float(val)
                        line = f.readline()

[docs]    def symmetrize_hessian(self, inplace=False):
        """
        Return a symmetrized hessian matrix.

        Args:
            inplace (bool): if True, do operation inplace and return None.
        """
        matrix = (self.hessian + self.hessian.T) / 2
        if inplace:
            self.hessian = matrix
            return None
        else:
            return matrix

[docs]    def get_mol_ang(self, inplace=False):
        """
        Return a new Molecule object with coordinate in angstrom

        Args:
            inplace (bool): if True, do operation inplace and return None.
        """
        newmol = Molecule(self.molecule.species,
                          self.molecule.cart_coords * BOHR_TO_ANGS)
        if inplace:
            self.molecule = newmol
            return None
        else:
            return newmol


[docs]class OrcaOutfile:
    """
    Parser for ORCA output file.

    Args:
        filename: Filename of Orca output file.

    .. attribute:: filename

        Path to the ORCA Output file

    .. attribute:: number_electrons

        Total number of electrons in the system

    .. attribute:: number_basis_functions

        Number of contracted basis functions in the main gaussian basis set.

    .. attribute:: nuclear_repulsion

        Nuclear repulsion energy of the last geometry.

    .. attribute:: charge

        Total charge of the molecule.

    .. attribute:: spin_multiplicity

        Spin multiplicity of the molecule.

    .. attribute:: mul_charges

        List of the last Mulliken atomic charges. If a geometry optimization
        is done, only the last charges are saved.

    .. attribute:: loe_charges

        List of the Loewdin atomic charges. If a geometry optimization is done,
        only the last charges are saved.

    .. attribute:: hirshfeld_charges

        List of the Hirshfeld atomic charges. If a geometry optimization is done,
        only the last charges are saved.

    .. attribute:: esp_charges

        List of the ESP charges fitted on the electrostatic potential. Use CHELPG
        in the input file to get that charges.

    .. attribute:: mul_spin_pop

        List of the last Mulliken spin populations. If a geometry optimization
        is done, only the last spin populations are saved.

    .. attribute:: loe_spin_pop

        List of the last Loewdin spin populations. If a geometry optimization
        is done, only the last spin populations are saved.

    .. attribute:: hirshfeld_spin_pop

        List of the last Hirshfeld spin populations. If a geometry optimization
        is done, only the last spin populations are saved.

    .. attribute:: orca_input

        An OrcaInput instance created from the input file read on the Orca
        outfile. The considered structure is the last one.

    .. attribute:: bond_orders

        Dict of bond order values read in the output file such as:
        {(0, 1): 0.8709, (1, 6): 1.234, ...}

    .. attribute:: dipole_moment

        Components of the dipole moment in a.u.

    .. attribute:: dipole

        Magnitude of the dipole moment in Debye

    .. attribute:: structures

        List of all structures in the calculation as pymatgen.Molecule object.
        The last geometry for the last additionnal SCF calculation is included.

    .. attribute:: is_spin

        True if the calculation is a unrestricted calculation (UKS) in INPUT. False
        if the calculation is a restricted calculation.

    .. attribute:: optimization_converged

        None if Opt is not in INPUT. True if optimization converge else False.

    .. attribute:: normal_termination

        True if Orca terminated normally.

    .. attribute: thermochemistry

        bool, if True, a thermocemistry calculation was done.

    .. attribute:: temperature

        Temperature, in Kelvin, for the calculations of thermochemical quantities

    .. attribute:: pressure

        Pressure, in atmosphere, for the calculations of thermochemical quantities

    .. attribute:: mass

        Total mass, in AMU (g.mol-1), for the calculations of themochemical quantities

    .. attribute:: frequencies

        List of vibrational frequencies in cm-1

    .. attribute:: thermo_data

        dictionnary of all thermochemistry data read in output file.
        ['electronic energy', 'zero point energy', 'thermal vibrational correction',
         'thermal rotational correction', 'thermal translational correction',
         'inner energy', 'thermal enthalpy correction', 'total enthalpy',
         'electronic entropy', 'vibrational entropy', 'rotational entropy',
         'translational entropy', 'total entropy correction',
         'final gibbs free enthalpy', 'g-e(el)', 'entropy']

    .. attribute:: mo_energies

        Dict of the molecular orbital energies for each spin (if relevant) such as
        {Spin.up: energies, Spin.down: energies}

    .. attribute:: mo_occupations

        Dict of molecular orbital occupations for each spin (if relevant) such as
        {Spin.up: occupations, Spin.down: occupations}

    .. attribute:: mo_matrix

        Dict of arrays of molecular orbital coefficients for each spin (if
        relevant). The shapes of the arrays are
        (number_basis_functions x number_basis_functions).
        {Spin.up: coefficients, Spin.down: coefficients}

    """

    def __init__(self, filename):
        self.filename = filename

        # initialisation of all attributes, thus they will always exist
        self.mul_charges = None
        self.mul_spin_pop = None
        self.loe_charges = None
        self.loe_spin_pop = None
        self.esp_charges = None
        self.hirshfeld_charges = None
        self.hirshfeld_spin_pop = None
        self.bond_orders = None
        self.dipole_moment = None
        self.dipole = None
        self.energies = list()
        self.structures = list()
        self.charge = None
        self.spin_multiplicity = None
        self.is_spin = False
        self.optimization_converged = None
        self.normal_termination = False
        self.scf_converge = False
        self.number_electrons = None
        self.number_basis_functions = None
        self.nuclear_repulsion = None

        self.thermochemistry = False
        self.temperature = None
        self.pressure = None
        self.mass = None
        self.frequencies = None
        self.thermo_data = None

        self.mo_energies = None
        self.mo_occupations = None
        self.mo_matrix = None

        # parse file
        self._parse()

    @property
    def final_energy(self):
        """ Last SCF energy """
        return self.energies[-1]

    @property
    def initial_structure(self):
        """ first geometry """
        return self.structures[0]

    @property
    def final_structure(self):
        """ last geometry """
        return self.structures[-1]

    @property
    def gibbs_free_enthalpy(self):
        """ Gibbs free enthalpy in kcal.mol-1 """
        if self.thermochemistry:
            return self.thermo_data["final gibbs free enthalpy"][1]
        else:
            print("WARNING: no thermochemistry data available in " + self.filename)
            return None

    @property
    def homo(self):
        """ HOMO: Highest Occupied Molecular Orbital """

        idx = np.where(self.mo_occupations[Spin.up] > 0.)[0][-1]
        homo = self.mo_energies[Spin.up][idx]

        if self.is_spin:
            idx_down = np.where(self.mo_occupations[Spin.down] > 0)[0][-1]
            homo_down = self.mo_energies[Spin.down][idx_down]
            if homo_down > homo:
                homo = homo_down
                idx = idx_down

        return idx, homo

    @property
    def lumo(self):
        """ LUMO: Lowest Unoccupied Molecular Orbital """

        idx = np.where(self.mo_occupations[Spin.up] < 1.)[0][0]
        lumo = self.mo_energies[Spin.up][idx]

        if self.is_spin:
            idx_down = np.where(self.mo_occupations[Spin.down] < 1.)[0][0]
            lumo_down = self.mo_energies[Spin.down][idx_down]
            if lumo_down < lumo:
                lumo = lumo_down
                idx = idx_down

        return idx, lumo

    @property
    def mo_dataframe(self):
        """
        A dict of data frames with MO coefficients such as:

            {Spin.up: dataframe, Spin.down: dataframe}

        Each column of the data frame is a MO. Each row contains the coefficients
        for an atom, in a specfic AO.
        """
        ao_type = [ao[1] for ao in self._ao_name]
        index = pd.MultiIndex.from_arrays([self._at_index, self._elements,
                                           self._ao_name, ao_type],
                                          names=["iat", "Element", "AO", "shell"])
        columns = ["MO_%d" % (i + 1) for i in range(self.number_basis_functions)]
        if self.is_spin:
            return {
                Spin.up: pd.DataFrame(self.mo_matrix[Spin.up], index=index,
                                      columns=columns),
                Spin.down: pd.DataFrame(self.mo_matrix[Spin.down], index=index,
                                        columns=columns)
            }
        else:
            return {Spin.up: pd.DataFrame(self.mo_matrix[Spin.up], index=index,
                                          columns=columns)}

    def _parse(self):
        """ Parse the file and fill in object and attributes """

        kw_patt = re.compile(r"^\|\s+\d+>\s!\s(.+)")
        input_patt = re.compile(r"^\|\s+\d+>\s+(.+)")
        q_patt = re.compile(r"^\s*\d+\s*([a-zA-Z]+)\s*:\s+([+-]?\d+\.\d+)\s*([+-]?\d+\.\d+)?")
        float_patt = re.compile(r"\s*([+-]?\d+\.\d+)")
        bo_patt = re.compile(r"B\(\s*(\d+)-(\S{1,2})\s*,\s*(\d+)-(\S{1,2})\s*\)\s+:\s*(\d+\.\d+)")
        dipole_patt = re.compile(r"^Total Dipole Moment\s+:\s+([+-]?\d+\.\d+)"
                                 r"\s+([+-]?\d+\.\d+)\s+([+-]?\d+\.\d+)")
        hirsh_patt = re.compile(r"\s+\d+\s+[a-zA-Z]{1,2}\s+([+-]?\d+\.\d+)"
                                r"\s+[+-]?\d+\.\d+")
        scf_patt = re.compile(r"FINAL SINGLE POINT ENERGY\s+([+-]?\d+\.\d+)")
        cart_patt = re.compile(r"^\s+([a-zA-Z]{1,2})\s+([+-]?\d+\.\d+)"
                               r"\s+([+-]?\d+\.\d+)\s+([+-]?\d+\.\d+)")
        scf_cvg_patt = re.compile(r"^\s+\*\s+SCF CONVERGED AFTER\s+(\d+)\s+CYCLES")
        mo_patt = re.compile(r"^\s*(?P<iat>\d+)(?P<element>[a-zA-Z]+)"
                             r"\s+(?P<AO>[\w+-]{1,6})"
                             r"(?P<coeffs>(\s*[+-]?\d+\.\d+){1,6})")
        thermo_patt = re.compile(
            r"^(?P<name>.+)\.\.\.\s+(?P<uaval>.[+-]?\d+\.\d+)\sEh"
            r"(\s+(?P<kcalval>.[+-]?\d+\.\d+)\skcal/mol)?")
        freq_patt = re.compile(r"freq\.\s+([+-]?\d+\.\d+)\s+E\(vib\)\s+")

        with open(self.filename, 'r', encoding="utf-8") as f:
            for line in f:
                #
                # Read input file
                #
                if "INPUT FILE" in line:
                    # set up list to store input parameters
                    input_parameters = list()
                    blocks = ""
                    self.input = ""

                    line = f.readline()
                    while "**END OF INPUT**" not in line:
                        if "#" in line:
                            line = f.readline()
                            continue
                        elif kw_patt.search(line):
                            data = kw_patt.findall(line)[0]
                            self.input += "! " + data + "\n"
                            input_parameters += [k.lower() for k in data.split()]
                        elif re.match(r"^\|\s+\d+>\s*(\*.*)", line):
                            self.input += re.match(r"^\|\s+\d+>\s*(\*.*)", line).group(1)
                            self.input += "\n"
                        elif input_patt.match(line):
                            data = input_patt.match(line).group(1) + "\n"
                            self.input += data
                            blocks += data
                        line = f.readline()

                    if "opt" in input_parameters:
                        self.optimization_converged = False

                elif re.match(r"^\s+Hartree-Fock type\s+HFTyp\s+\.{4}", line):
                    if line.split()[-1] == "UHF":
                        self.is_spin = True

                elif re.match(r"^\s+Number of Electrons\s+NEL\s+\.{4}", line):
                    self.number_electrons = int(line.split()[-1])

                elif re.match(r"^\s+Basis Dimension\s+Dim\s+\.{4}", line):
                    self.number_basis_functions = int(line.split()[-1])

                elif re.match(r"^\s+Total Charge\s+Charge\s+\.{4}", line):
                    self.charge = int(line.split()[-1])

                elif re.match(r"^\s+Multiplicity\s+Mult\s+\.{4}", line):
                    self.spin_multiplicity = int(line.split()[-1])

                elif re.match(r"^\s+Nuclear Repulsion\s+ENuc\s+\.{4}", line):
                    self.nuclear_repulsion = float(line.split()[-2])

                elif scf_patt.match(line):
                    energy = float(scf_patt.findall(line)[0])
                    self.energies.append(energy)

                elif scf_cvg_patt.match(line):
                    self.scf_converge = True

                elif "SCF ITERATIONS" in line:
                    # start new SCF step
                    self.scf_converge = False

                elif "ORBITAL ENERGIES" == line.strip():
                    f.readline()
                    f.readline()
                    f.readline()

                    energies = list()
                    occupations = list()
                    for _ in range(self.number_basis_functions):
                        line = f.readline()
                        energies.append(float(line.split()[2]))
                        occupations.append(float(line.split()[1]))
                    self.mo_energies = {Spin.up: np.array(energies)}
                    self.mo_occupations = {Spin.up: np.array(occupations)}

                    if self.is_spin:
                        f.readline()
                        f.readline()
                        f.readline()

                        energies = list()
                        occupations = list()
                        for _ in range(self.number_basis_functions):
                            line = f.readline()
                            energies.append(float(line.split()[2]))
                            occupations.append(float(line.split()[1]))
                        self.mo_energies[Spin.down] = np.array(energies)
                        self.mo_occupations[Spin.down] = np.array(occupations)

                elif "MOLECULAR ORBITALS" == line.strip():
                    spins = [Spin.up]
                    if self.is_spin:
                        spins.append(Spin.down)

                    f.readline()

                    self.mo_matrix = {}
                    for spin in spins:
                        self.mo_matrix[spin] = np.zeros((self.number_basis_functions,
                                                         self.number_basis_functions))

                        n_mo = 0
                        while n_mo < self.number_basis_functions - 1:
                            line = f.readline()  # MO index
                            orbital_numbers = [int(i) for i in line.split()]
                            n_number = len(orbital_numbers)
                            f.readline()  # MO energies
                            f.readline()  # MO occupation
                            f.readline()  # dash line

                            self._at_index = list()
                            self._ao_name = list()
                            self._elements = list()

                            for ibf in range(self.number_basis_functions):
                                data = mo_patt.match(f.readline()).groupdict()
                                coeffs = float_patt.findall(data.pop("coeffs"))
                                self.mo_matrix[spin][ibf, n_mo:n_mo + n_number] = \
                                    [float(coeff) for coeff in coeffs]
                                self._elements.append(data["element"])
                                self._at_index.append(int(data["iat"]))
                                self._ao_name.append(data["AO"])

                            n_mo += n_number

                        f.readline()  # read blank line between spins

                elif "MULLIKEN ATOMIC CHARGES" in line:
                    f.readline()
                    line = f.readline()
                    self.mul_charges = list()
                    if self.is_spin:
                        self.mul_spin_pop = list()
                    while q_patt.match(line):
                        match = q_patt.match(line)
                        self.mul_charges.append(float(match.group(2)))
                        if self.is_spin:
                            self.mul_spin_pop.append(float(match.group(3)))
                        line = f.readline()

                elif "LOEWDIN ATOMIC CHARGES" in line:
                    f.readline()
                    line = f.readline()
                    self.loe_charges = list()
                    if self.is_spin:
                        self.loe_spin_pop = list()
                    while q_patt.search(line):
                        match = q_patt.match(line)
                        self.loe_charges.append(float(match.group(2)))
                        if self.is_spin:
                            self.loe_spin_pop.append(float(match.group(3)))
                        line = f.readline()

                elif "CHELPG Charges" in line:
                    f.readline()
                    line = f.readline()
                    self.esp_charges = []
                    while q_patt.search(line):
                        vals = [float(c) for c in float_patt.findall(line)]
                        self.esp_charges.append(vals[0])
                        line = f.readline()

                elif "HIRSHFELD ANALYSIS" in line:
                    [f.readline() for _ in range(6)]
                    line = f.readline()
                    self.hirshfeld_charges = list()
                    while hirsh_patt.match(line):
                        val = float(hirsh_patt.findall(line)[0])
                        self.hirshfeld_charges.append(val)
                        line = f.readline()

                elif "Mayer bond orders larger than 0.1" in line:
                    self.bond_orders = {}
                    line = f.readline()
                    while bo_patt.match(line):
                        bo_line = bo_patt.findall(line)
                        for iat, _, jat, _, val in bo_line:
                            key = (int(iat), int(jat))
                            self.bond_orders[key] = float(val)
                        line = f.readline()

                elif dipole_patt.match(line):
                    dipole = [float(val) for val in dipole_patt.findall(line)[0]]
                    self.dipole_moment = np.array(dipole)
                    f.readline()
                    f.readline()
                    self.dipole = float(f.readline().split()[-1])

                elif "CARTESIAN COORDINATES (ANGSTROEM)" in line:
                    f.readline()
                    line = f.readline()
                    species = list()
                    coords = list()
                    while cart_patt.match(line):
                        match = cart_patt.match(line)
                        species.append(match.group(1))
                        coords.append([float(val) for val in match.group(2, 3, 4)])
                        line = f.readline()
                    if self.charge and self.spin_multiplicity:
                        self.structures.append(Molecule(species, coords,
                                                        charge=self.charge,
                                                        spin_multiplicity=self.spin_multiplicity))
                    else:
                        self.structures.append(Molecule(species, coords))

                elif "THERMOCHEMISTRY AT " in line:
                    self.thermochemistry = True
                    self.thermo_data = dict()
                    self.frequencies = list()

                elif "THE OPTIMIZATION HAS CONVERGED" in line:
                    self.optimization_converged = True

                elif "****ORCA TERMINATED NORMALLY****" in line:
                    self.normal_termination = True

                elif self.thermochemistry:
                    if "Temperature" in line:
                        self.temperature = float(line.split()[-2])
                    elif "Pressure" in line:
                        self.pressure = float(line.split()[-2])
                    elif "Total Mass" in line:
                        self.mass = float(line.split()[-2])
                    elif thermo_patt.match(line):
                        gdict = thermo_patt.match(line).groupdict()
                        name = gdict["name"].strip().lower()
                        uaval = float(gdict["uaval"])
                        kcalval = float(gdict["kcalval"]) if gdict["kcalval"] else uaval * UA_TO_KCAL
                        self.thermo_data[name] = (uaval, kcalval)
                    elif freq_patt.match(line):
                        freqval = float(freq_patt.match(line).group(1))
                        self.frequencies.append(freqval)
                    elif "Total thermal energy" in line:
                        value = float(line.split()[-2])
                        self.thermo_data["inner energy"] = (value, value * UA_TO_KCAL)

                    if "G-E(el)" in line:
                        # remove temporary thermochemistry read
                        self.thermochemistry = False

        # clean up thermochemistry data
        if self.thermo_data:
            self.thermochemistry = True
            self.thermo_data.pop("total free energy")  # this is inner energy
            self.thermo_data["entropy"] = self.thermo_data.pop("final entropy term")

        # set up an OrcaInput instance with the LAST geometry
        self.orca_input = OrcaInput(self.final_structure, charge=self.charge,
                                    spin_multiplicity=self.spin_multiplicity,
                                    input_parameters=input_parameters,
                                    blocks=[blocks])


class OrcaEnGradfile:
    """
    Parser for ORCA EnGrad file.

    Args:
        filename: Filename of Orca engrad file.

    .. attribute:: filename

        Path to the ORCA engrad file

    .. attribute:: grad

        Gradient of the molecule in units of Hartree/Bohr

    """

    def __init__(self, filename):
        self.filename = filename

        # initialisation of all attributes, thus they will always exist
        self.grad = list()

        # parse file
        self._parse()

    @property
    def gradient(self):
        """ gradient """
        return self.grad

    def _parse(self):
        """ Parse the file and fill in object and attributes """

        with open(self.filename, 'r', encoding="utf-8") as f:
            for line in f:
                #
                # Read input file
                #
                if "# The current gradient in Eh/bohr" in line:
                    line = f.readline()
                    line = f.readline()

                    while "#" not in line:
                        self.grad.append(float(line.rstrip()))
                        line = f.readline()

            self.grad = np.array(self.grad).reshape(int(len(self.grad) / 3), 3)


class OrcaRun(OrcaOutfile):
    """
    A class to gather all results from an Orca calculations. When instanciated
    the class will try to read all available files according to the given basename.

    Args:
        basename (str): basename of Orca output files of the current job.
    """

    def __init__(self, basename):
        self.basename = basename

        # read output file if it exists
        outfile = pathlib.Path(self.basename + ".out")
        if outfile.is_file():
            super().__init__(outfile)
        else:
            raise FileNotFoundError("File %s.out not found." % basename +
                                    "Check your ORCA calculation.")

        # read hessian file if it exists
        hessian = pathlib.Path(self.basename + ".hess")
        if hessian.is_file():
            self.hessian = OrcaHessian(hessian)
        else:
            self.hessian = None

        # read engrad file if it exists
        engrad = pathlib.Path(self.basename + ".engrad")
        if engrad.is_file():
            self.engrad = OrcaEnGradfile(engrad)
        else:
            self.engrad = None
Source code for orca

Table of Contents

Search